//************************************************************
//
// DATASTEP 2. COMPUTING FIRM TECHNO DISTANCE
//
// DESCRIPTION: COMPUTING KNOWLEDGE DISTANCE
//
// AUTHOR: LIONEL NESTA
//
// DATASOURCE: USPTO PATENT PER FIRM-YEAR + PATENT-TEK
//
//*************************************************************


// PREAMBLE

clear
clear matrix 
clear mata
set more off
set mem 600m
set matsize 800

// UPLOADING DATA

use firm_pat.dta, clear
mmerge patnum using pat_ec_all.dta, type(n:n) unmatched(master)
drop _merge
drop if ec==""
rename ec tek
cap drop firm_name


// EXPANDING  YEARS
	
expand 5
seq inter, by(patnum tek gvkey)
replace year = year + inter - 1
drop inter
drop if year > 2005
drop if year < 1968
	
// COLLAPSING

collapse (count) nbpat = patnum, by(gvkey year tek)

// RESHAPING

reshape wide nbpat , i(gvkey year) j(tek) string

// SAVING

save temp.dta, replace


// MATA FUNCTION PROGRAMME FOR COSINE COMPUTATION

capture mata: mata drop cosine()
mata: 
mata set matastrict on
void cosine(real matrix allval)
   
	{		
		
		// DECLARING OBJECTS
		
		real matrix C
		real matrix R
		
		real colvector Fi
		real colvector Fj
		
		real scalar nbfirm
		real scalar nbtek
			
		real scalar i
		real scalar j
		
		real scalar counter 
		real scalar cosine
		real scalar r
		
		C = allval
		nbfirm = rows(C)
		nbtek = cols(C)
		
		// GETTING THE DATA FROM STATA
		
		st_view(R = ., ., .)
					
		// REPLACING ZEROS		
			
			for (i=1; i<=nbfirm;i++) {
			
				for (j=2; j<=nbtek;j++) {
				
					if (C[i,j] == .) {

						C[i,j] = 0
					
					} // END if
			
				} // END FOR j
				
			} // END FOR i
			
			//COMPUTING COSINE
			
			counter = 1
						
			for (i=1; i<=nbfirm;i++) {
				
				Fi = C[i,2..nbtek]
				
					for (j=1; j<=nbfirm;j++) {
														
					Fj = C[j,2..nbtek]
					
					// GETTING RID OF ZEROS IN BOTH VECTORS
										
					cosine = (Fi*Fj') * invsym(sqrt((Fi* Fi')*(Fj* Fj')))
								
					r = ((Fi*Fj') - (sum(Fi) * sum(Fj)) / nbtek)  / (sqrt((Fi* Fi' - sum(Fi)^2 / nbtek )*( Fj* Fj' - sum(Fj)^2 / nbtek ) ) )
													
					R[counter,3] = cosine
					
					R[counter,4] = r 
					
					R[counter,1] = C[i,1] 
									
					R[counter,2] = C[j,1] 
					
					counter = counter + 1
							
				} // END FOR j
			
			} // END FOR i
			
} // END COSINE

end

// LOOPING OVER THE YEARS

use temp.dta, clear
sum year
local miny = r(min)
local maxy = r(max)

forvalue y = `miny'/`maxy' {

 	use temp.dta, clear
	set more off
	keep if year == `y'
	drop year
	
	
	mata: allval = st_data( ., .) 
	
	count
	local obs = r(N) * r(N)
	
	drop _all
	gen gvkey1 = .
	gen gvkey2 = .
	gen cosine = .
	gen c_techno = .
	
	set obs `obs'

	mata: cosine(allval)
		
	// EXPORTING FROM MATA TO STATA
	
	gen year = `y'
	
	// SAVING YEAR COSINE
	
	save cosine_`y'.dta, replace

} // END FORVALUES y
	
// APPENDING ALL COSINE DATASETS

use cosine_`miny'.dta, clear

local miny_augmented = `miny' + 1


forvalue y = `miny_augmented'/`maxy' {
	append using cosine_`y'.dta
}
	

duplicates drop
drop if gvkey1 == gvkey2


sum c_techno
gen corr_techno = ((2 * c_techno) - (r(min) + r(max)))/ (r(max) - r(min))

// AND SAVING

rename cosine cosine_techno



save cosine_techno.dta, replace
 
 // CLEANING DIRECTORY

erase temp.dta
forvalue y = `miny'/`maxy' {
	erase cosine_`y'.dta
}
